In [1]:
# load required libraries
options(stringsAsFactors = F)
options (repr.plot.width = 14, repr.plot.height = 6)
suppressPackageStartupMessages({
library(Seurat)
library(harmony)
library(ggplot2)
library(dplyr)
library(Matrix)
library(Hmisc)
library(ggsci)
library(viridis)
library(RColorBrewer)
library(ggrepel)
library(cowplot)
})
set.seed(123)
In [2]:
# load samples
sample_ctrl_220424 <- readRDS("../../1_CAD_Preprocessing/CTRL220424/rds/DCM1_clean_clustered.rds")
sample_ctrl_220424 <- subset(sample_ctrl_220424, idents = 2, invert = T)
sample_ctrl_220424$source <- "DCM1"
sample_ctrl_220424
sample_ctrl_220519 <- readRDS("../../1_CAD_Preprocessing/CTRL220519/rds/DCM2_clean_clustered.rds")
sample_ctrl_220519$source <- "DCM2"
sample_ctrl_220519
sample_ctrl_220615 <- readRDS("../../1_CAD_Preprocessing/CTRL220615/rds/DCM3_clean_clustered.rds")
sample_ctrl_220615$source <- "DCM3"
sample_ctrl_220615
An object of class Seurat 33538 features across 13422 samples within 1 assay Active assay: RNA (33538 features, 2000 variable features) 3 dimensional reductions calculated: pca, tsne, umap
An object of class Seurat 33538 features across 9030 samples within 1 assay Active assay: RNA (33538 features, 2000 variable features) 3 dimensional reductions calculated: pca, tsne, umap
An object of class Seurat 33538 features across 8825 samples within 1 assay Active assay: RNA (33538 features, 2000 variable features) 3 dimensional reductions calculated: pca, tsne, umap
In [3]:
# merge sample and normalize
scList <- list(sample_ctrl_220424, sample_ctrl_220519, sample_ctrl_220615)
sample <- merge(scList[[1]], scList[2:3])
sample <- subset(sample, nFeature_RNA >= 500 & nFeature_RNA <= 5000 & percent.mt <= 10)
sample <- NormalizeData(sample, normalization.method = "LogNormalize", scale.factor = 10000)
# find variable Genes and scale data
sample <- FindVariableFeatures(sample, selection.method = "vst")
sample <- ScaleData(sample)
sample
Warning message in CheckDuplicateCellNames(object.list = objects): “Some cell names are duplicated across objects provided. Renaming to enforce unique cell names.” Centering and scaling data matrix
An object of class Seurat 33538 features across 29660 samples within 1 assay Active assay: RNA (33538 features, 2000 variable features)
In [4]:
# run pca and harmony
sample <- RunPCA(sample, verbose = FALSE)
sample <- RunHarmony(sample, group.by.vars = "source", verbose = FALSE)
DimPlot(sample, reduction = "harmony", pt.size = 0.1, group.by = "source")
ElbowPlot(sample, ndims = 30)
Warning message: “Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
In [5]:
# dimension reduction and clustering
pca_dims <- 1:30
sample <- RunTSNE(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- RunUMAP(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- FindNeighbors(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- FindClusters(sample, resolution = 0.5, verbose = FALSE)
DimPlot(sample, label=TRUE, reduction = "tsne", group.by = "ident", pt.size = 0.1)
DimPlot(sample, label=TRUE, reduction = "umap", group.by = "ident", pt.size = 0.1)
Warning message: “The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation' This message will be shown once per session”
In [6]:
# quality control by cluster
VlnPlot(sample, features = c("nFeature_RNA","nCount_RNA","percent.mt"), group.by = "seurat_clusters", ncol = 1, pt.size = 0.1)
ggsave("figure/qc_sample_by_cluster_DCM.pdf", width = 12, height = 10)
In [7]:
# add resolution
sample <- FindClusters(sample, resolution = 1, verbose = FALSE)
DimPlot(sample, label = TRUE, reduction = "umap", group.by = "ident")
DimPlot(sample, label = TRUE, reduction = "tsne", group.by = "ident")
In [6]:
# plot marker genes list
markers <- unique(c("PECAM1","VWF","CDH5","FLT1","FLI1","LMO2","CLEC14A","FABP4","IFI27","EGFL7", # EC
"CCL14","ACKR1","NRP2","NR2F2", # VEC
"HEY1","GJA5","VEGFC","SOX17", # AEC
"ABCC9", "GUCY1A2", "RGS5", "EGFLAM", # Pericyte
"CCL21", # LEC
"CLDN1", # Mesothelial
"ACTA2","MYH11","MYL9","TAGLN","TPM2","RGS5","PPP1R14A", # SMC
"ACTA2","MYH11","LUM","DCN","PPP1R14A", # MyoFB
"LUM","DCN","FBLN1","APOD","CFD","SFRP2", # FB
"PLP1","NRXN1","S100B","MPZ","GPM6B", # OLG
"CD3D","CD3E","IL7R","CXCR4","CCL5","CD52", # T
"KLRD1","NKG7","CCL5", # NK
"CD68","AIF1","MPEG1","IL1B","CXCL8","CCL3","CXCR4", # Macrophage
"CD79A","MZB1","JCHAIN","IGKC","IGHG1","IGHG2","IGHA1", # B
"TPASB1","TPSB2","CPA3","AREG","HPGD5")) # Mast
DotPlot(sample, features=rev(markers), cols=c("grey90","red3"), group.by="seurat_clusters") + theme(axis.text.x=element_text(angle=45, hjust=1))
ggsave("figure/dotplot_CAD_markers_DCM.pdf", width = 16, height = 10)
Warning message in FetchData.Seurat(object = object, vars = features, cells = cells): “The following requested variables were not found: HPGD5, TPASB1”
In [7]:
# quality control by cluster
VlnPlot(sample, features = c("nFeature_RNA","nCount_RNA","percent.mt"), group.by = "seurat_clusters", ncol = 1, pt.size = 0.1)
ggsave("figure/qc_sample_by_cluster_res1.pdf", width = 15, height = 10)
In [8]:
# explore marker genes
FeaturePlot(sample, label = T, features=c("LAMA2"), cols=c("grey90","red3"), reduction="umap", pt.size = 0.1)
In [9]:
# save the sample
saveRDS(sample, file = "rds/sample_DCM.rds")
In [10]:
# remove low quality clusters
subsample <- subset(sample, idents = c(3, 16, 10, 14, 15, 22, 23, 24, 25), invert = T)
subsample <- NormalizeData(subsample, normalization.method = "LogNormalize", scale.factor = 10000)
dim(GetAssayData(subsample, slot = "counts"))
# find variable genes and scale data by number of UMIs and mitochondrial gene percentage
subsample <- FindVariableFeatures(subsample, selection.method = "vst")
length(subsample@assays$RNA@var.features)
subsample <- ScaleData(subsample)
# run pca and harmony
subsample <- RunPCA(subsample, verbose = FALSE)
subsample <- RunHarmony(subsample, group.by.vars = "source", verbose = FALSE)
DimPlot(subsample, reduction = "harmony", pt.size = 0.1, group.by = "source")
ElbowPlot(subsample, ndims = 30)
- 33538
- 23608
2000
Centering and scaling data matrix Warning message: “Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
In [11]:
# dimension reduction and clustering
pca_dims <- 1:30
subsample <- RunTSNE(subsample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
subsample <- RunUMAP(subsample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
subsample <- FindNeighbors(subsample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
subsample <- FindClusters(subsample, resolution = 0.5, verbose = FALSE)
DimPlot(subsample, label=TRUE, reduction = "tsne", group.by = "ident", pt.size = 0.1)
DimPlot(subsample, label=TRUE, reduction = "umap", group.by = "ident", pt.size = 0.1)
In [12]:
# quality control by cluster
VlnPlot(subsample, features = c("nFeature_RNA","nCount_RNA","percent.mt"), group.by = "seurat_clusters", ncol = 1, pt.size = 0.1)
ggsave("figure/qc_subsample_by_cluster_DCM.pdf", width = 10, height = 10)
In [13]:
# add resolution
subsample <- FindClusters(subsample, resolution = 0.8, verbose = FALSE)
DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "ident")
DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "ident")
In [14]:
# plot marker genes list
markers <- unique(c("PECAM1","VWF","CDH5","FLT1","FLI1","LMO2","CLEC14A","FABP4","IFI27","EGFL7", # EC
"CCL14","ACKR1","NRP2","NR2F2", # VEC
"HEY1","GJA5","VEGFC","SOX17", # AEC
"ABCC9", "GUCY1A2", "RGS5", "EGFLAM", # Pericyte
"CCL21", # LEC
"CLDN1", # Mesothelial
"ACTA2","MYH11","MYL9","TAGLN","TPM2","RGS5","PPP1R14A", # SMC
"ACTA2","MYH11","LUM","DCN","PPP1R14A", # MyoFB
"LUM","DCN","FBLN1","APOD","CFD","SFRP2", # FB
"PLP1","NRXN1","S100B","MPZ","GPM6B", # OLG
"CD3D","CD3E","IL7R","CXCR4","CCL5","CD52", # T
"KLRD1","NKG7","CCL5", # NK
"CD68","AIF1","MPEG1","IL1B","CXCL8","CCL3","CXCR4", # Macrophage
"CD79A","MZB1","JCHAIN","IGKC","IGHG1","IGHG2","IGHA1", # B
"TPASB1","TPSB2","CPA3","AREG","HPGD5")) # Mast
DotPlot(subsample, features=rev(markers), cols=c("grey90","red3"), group.by="seurat_clusters") + theme(axis.text.x=element_text(angle=45, hjust=1))
ggsave("figure/dotplot_CAD_markers_subsample_DCM.pdf", width = 16, height = 10)
Warning message in FetchData.Seurat(object = object, vars = features, cells = cells): “The following requested variables were not found: HPGD5, TPASB1”
In [15]:
# plot tsne and umap
a <- DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "ident")
b <- DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "ident")
plot_grid(a, b, ncol = 2)
In [16]:
# explore marker genes
gene <- "LUM"
a <- FeaturePlot(subsample, label = T, features=gene, cols=c("grey90","red3"), reduction="tsne", pt.size = 0.1)
b <- DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "ident")
plot_grid(a, b, ncol = 2)
c <- FeaturePlot(subsample, label = T, features=gene, cols=c("grey90","red3"), reduction="umap", pt.size = 0.1)
d <- DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "ident")
plot_grid(c, d, ncol = 2)
In [17]:
# save the subsample
saveRDS(subsample, file = "rds/subsample_DCM.rds")
In [18]:
# celltype assignment
subsample <- subset(subsample, idents = 18, invert = T) # remove cluster 18
cluster = c(0:17)
celltype = c("FB","EC","EC","T","SMC","SMC","PC","Mac","FB","LEC","PC",
"EC","EC","EC","MC","GC","GC","T")
subsample$celltype = plyr::mapvalues(x=Idents(subsample), from=cluster, to=celltype)
a <- DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "celltype")
b <- DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "celltype")
plot_grid(a, b, ncol = 2)
In [19]:
# save the annotated subsample
saveRDS(subsample, file = "rds/subsample_DCM_annotated_level1.rds")
In [20]:
# calculate markers
markers_cluster <- FindAllMarkers(subsample, only.pos = T, verbose = F)
write.csv(markers_cluster, "meta/markers_DCM_by_cluster.csv")
In [21]:
# calculate celltype markers
subsample$celltype <- factor(subsample$celltype, levels = c("VEC","CEC","AEC","LEC","MC","FB","SMC","PC1","PC2","Mac","T","NKT","GC1","GC2"))
Idents(subsample) <- subsample$celltype
markers_celltype <- FindAllMarkers(subsample, only.pos = T, verbose = F)
write.csv(markers_celltype, "meta/markers_DCM_by_celltype.csv")
In [22]:
# list the session info
sessionInfo()
R version 4.3.1 (2023-06-16) Platform: x86_64-conda-linux-gnu (64-bit) Running under: Ubuntu 18.04.5 LTS Matrix products: default BLAS/LAPACK: /data/zju/ty/miniconda/envs/singlecell/lib/libopenblasp-r0.3.3.so; LAPACK version 3.8.0 locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=zh_CN.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=zh_CN.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=zh_CN.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=zh_CN.UTF-8 LC_IDENTIFICATION=C time zone: Asia/Shanghai tzcode source: system (glibc) attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] cowplot_1.1.1 ggrepel_0.9.4 RColorBrewer_1.1-3 viridis_0.6.4 [5] viridisLite_0.4.2 ggsci_3.0.0 Hmisc_5.1-1 Matrix_1.6-1.1 [9] dplyr_1.1.4 ggplot2_3.5.1 harmony_0.1.1 Rcpp_1.0.11 [13] SeuratObject_4.1.4 Seurat_4.4.0 loaded via a namespace (and not attached): [1] rstudioapi_0.17.1 jsonlite_1.8.7 magrittr_2.0.3 [4] spatstat.utils_3.1-1 rmarkdown_2.25 farver_2.1.1 [7] ragg_1.2.5 vctrs_0.6.4 ROCR_1.0-11 [10] spatstat.explore_3.2-3 base64enc_0.1-3 htmltools_0.5.6.1 [13] Formula_1.2-5 sctransform_0.4.0 parallelly_1.36.0 [16] KernSmooth_2.23-22 htmlwidgets_1.6.2 ica_1.0-3 [19] plyr_1.8.9 plotly_4.10.2 zoo_1.8-12 [22] uuid_1.1-1 igraph_1.5.1 mime_0.12 [25] lifecycle_1.0.3 pkgconfig_2.0.3 R6_2.5.1 [28] fastmap_1.1.1 fitdistrplus_1.1-11 future_1.33.0 [31] shiny_1.7.5 digest_0.6.33 colorspace_2.1-0 [34] patchwork_1.3.0.9000 tensor_1.5 irlba_2.3.5.1 [37] textshaping_0.4.0 labeling_0.4.3 progressr_0.14.0 [40] fansi_1.0.5 spatstat.sparse_3.0-2 httr_1.4.7 [43] polyclip_1.10-6 abind_1.4-5 compiler_4.3.1 [46] withr_2.5.1 htmlTable_2.4.1 backports_1.4.1 [49] MASS_7.3-60 tools_4.3.1 foreign_0.8-85 [52] lmtest_0.9-40 httpuv_1.6.11 future.apply_1.11.0 [55] nnet_7.3-19 goftest_1.2-3 glue_1.6.2 [58] nlme_3.1-163 promises_1.2.1 grid_4.3.1 [61] pbdZMQ_0.3-10 checkmate_2.2.0 Rtsne_0.16 [64] cluster_2.1.4 reshape2_1.4.4 generics_0.1.3 [67] gtable_0.3.4 spatstat.data_3.0-1 tidyr_1.3.1 [70] data.table_1.14.8 sp_2.1-0 utf8_1.2.3 [73] spatstat.geom_3.2-5 RcppAnnoy_0.0.21 RANN_2.6.1 [76] pillar_1.9.0 stringr_1.5.0 IRdisplay_1.1 [79] later_1.3.1 splines_4.3.1 lattice_0.21-9 [82] survival_3.5-7 deldir_1.0-9 tidyselect_1.2.0 [85] miniUI_0.1.1.1 pbapply_1.7-2 knitr_1.44 [88] gridExtra_2.3 scattermore_1.2 xfun_0.40 [91] matrixStats_1.0.0 stringi_1.7.12 lazyeval_0.2.2 [94] evaluate_0.22 codetools_0.2-19 tibble_3.2.1 [97] cli_3.6.3 uwot_0.1.16 IRkernel_1.3.2 [100] rpart_4.1.21 systemfonts_1.1.0 xtable_1.8-4 [103] reticulate_1.34.0 repr_1.1.6 munsell_0.5.0 [106] globals_0.16.2 spatstat.random_3.1-6 png_0.1-8 [109] parallel_4.3.1 ellipsis_0.3.2 listenv_0.9.0 [112] scales_1.3.0 ggridges_0.5.4 leiden_0.4.3 [115] purrr_1.0.2 crayon_1.5.2 rlang_1.1.4
In [ ]: